#define O_LARGEFILE 0
#endif
-struct pending_aio {
- td_callback_t cb;
- int id;
- void *private;
- uint64_t lsec;
-};
-
struct tdaio_state {
int fd;
-
- /* libaio state */
- tap_aio_context_t aio_ctx;
- struct iocb iocb_list [MAX_AIO_REQS];
- struct iocb *iocb_free [MAX_AIO_REQS];
- struct pending_aio pending_aio[MAX_AIO_REQS];
- int iocb_free_count;
- struct iocb *iocb_queue[MAX_AIO_REQS];
- int iocb_queued;
- struct io_event aio_events[MAX_AIO_REQS];
+ tap_aio_context_t aio;
};
-#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
/*Get Image size, secsize*/
static int get_image_info(struct td_state *s, int fd)
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = prv->aio_ctx.pollfd;
+ dd->io_fd[0] = prv->aio.aio_ctx.pollfd;
}
/* Open the disk file and initialize aio state. */
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
DPRINTF("block-aio open('%s')", name);
- /* Initialize AIO */
- prv->iocb_free_count = MAX_AIO_REQS;
- prv->iocb_queued = 0;
-
- ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
- if (ret < 0) {
- if (ret == -EAGAIN) {
- DPRINTF("Couldn't setup AIO context. If you are "
- "trying to concurrently use a large number "
- "of blktap-based disks, you may need to "
- "increase the system-wide aio request limit. "
- "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
- "aio-max-nr')\n");
- } else {
- DPRINTF("Couldn't setup AIO context.\n");
- }
- goto done;
- }
- for (i=0;i<MAX_AIO_REQS;i++)
- prv->iocb_free[i] = &prv->iocb_list[i];
+ /* Initialize AIO */
+ ret = tap_aio_init(&prv->aio, 0, MAX_AIO_REQS);
+ if (ret != 0)
+ return ret;
/* Open the file */
o_flags = O_DIRECT | O_LARGEFILE |
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct iocb *io;
- struct pending_aio *pio;
struct td_state *s = dd->td_state;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
- long ioidx;
-
- if (prv->iocb_free_count == 0)
- return -ENOMEM;
- io = prv->iocb_free[--prv->iocb_free_count];
-
- ioidx = IOCB_IDX(prv, io);
- pio = &prv->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->lsec = sector;
-
- io_prep_pread(io, prv->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- prv->iocb_queue[prv->iocb_queued++] = io;
- return 0;
+ return tap_aio_read(&prv->aio, prv->fd, size, offset, buf,
+ cb, id, sector, private);
}
int tdaio_queue_write(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *private)
{
- struct iocb *io;
- struct pending_aio *pio;
struct td_state *s = dd->td_state;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
int size = nb_sectors * s->sector_size;
uint64_t offset = sector * (uint64_t)s->sector_size;
- long ioidx;
-
- if (prv->iocb_free_count == 0)
- return -ENOMEM;
- io = prv->iocb_free[--prv->iocb_free_count];
-
- ioidx = IOCB_IDX(prv, io);
- pio = &prv->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->lsec = sector;
-
- io_prep_pwrite(io, prv->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- prv->iocb_queue[prv->iocb_queued++] = io;
- return 0;
+ return tap_aio_write(&prv->aio, prv->fd, size, offset, buf,
+ cb, id, sector, private);
}
-
+
int tdaio_submit(struct disk_driver *dd)
{
- int ret;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- if (!prv->iocb_queued)
- return 0;
-
- ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->iocb_queued = 0;
-
- return 0;
+ return tap_aio_submit(&prv->aio);
}
-
+
int tdaio_close(struct disk_driver *dd)
{
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- io_destroy(prv->aio_ctx.aio_ctx);
+ io_destroy(prv->aio.aio_ctx.aio_ctx);
close(prv->fd);
return 0;
struct io_event *ep;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;
- nr_events = tap_aio_get_events(&prv->aio_ctx);
+ nr_events = tap_aio_get_events(&prv->aio.aio_ctx);
repeat:
- for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
+ for (ep = prv->aio.aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
- pio = &prv->pending_aio[(long)io->data];
+ pio = &prv->aio.pending_aio[(long)io->data];
rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
- pio->lsec, io->u.c.nbytes >> 9,
+ pio->sector, io->u.c.nbytes >> 9,
pio->id, pio->private);
- prv->iocb_free[prv->iocb_free_count++] = io;
+ prv->aio.iocb_free[prv->aio.iocb_free_count++] = io;
}
if (nr_events) {
- nr_events = tap_aio_more_events(&prv->aio_ctx);
+ nr_events = tap_aio_more_events(&prv->aio.aio_ctx);
goto repeat;
}
- tap_aio_continue(&prv->aio_ctx);
+ tap_aio_continue(&prv->aio.aio_ctx);
return rsp;
}
(l + (s - 1)) - ((l + (s - 1)) % s)); \
})
-struct pending_aio {
- td_callback_t cb;
- int id;
- void *private;
- int nb_sectors;
- char *buf;
- uint64_t sector;
-};
-
+#undef IOCB_IDX
#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
#define ZERO_TEST(_b) (_b | 0x00)
uint32_t l2_cache_counts[L2_CACHE_SIZE]; /*Cache access record*/
uint8_t *cluster_cache;
uint8_t *cluster_data;
- uint8_t *sector_lock; /*Locking bitmap for AIO reads/writes*/
uint64_t cluster_cache_offset; /**/
uint32_t crypt_method; /*current crypt method, 0 if no
*key yet */
uint32_t crypt_method_header; /**/
AES_KEY aes_encrypt_key; /*AES key*/
AES_KEY aes_decrypt_key; /*AES key*/
- /* libaio state */
- tap_aio_context_t aio_ctx;
- int max_aio_reqs;
- struct iocb *iocb_list;
- struct iocb **iocb_free;
- struct pending_aio *pending_aio;
- int iocb_free_count;
- struct iocb **iocb_queue;
- int iocb_queued;
- struct io_event *aio_events;
+
+ /* libaio state */
+ tap_aio_context_t aio;
};
static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
-static void free_aio_state(struct disk_driver *dd)
-{
- struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
-
- if (s->sector_lock)
- free(s->sector_lock);
- if (s->iocb_list)
- free(s->iocb_list);
- if (s->pending_aio)
- free(s->pending_aio);
- if (s->aio_events)
- free(s->aio_events);
- if (s->iocb_free)
- free(s->iocb_free);
- if (s->iocb_queue)
- free(s->iocb_queue);
-}
-
-static int init_aio_state(struct disk_driver *dd)
-{
- int i, ret;
- struct td_state *bs = dd->td_state;
- struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
- long ioidx;
-
- s->iocb_list = NULL;
- s->pending_aio = NULL;
- s->aio_events = NULL;
- s->iocb_free = NULL;
- s->iocb_queue = NULL;
-
- /*Initialize Locking bitmap*/
- s->sector_lock = calloc(1, bs->size);
-
- if (!s->sector_lock) {
- DPRINTF("Failed to allocate sector lock\n");
- goto fail;
- }
-
- /* A segment (i.e. a page) can span multiple clusters */
- s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
- MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
-
- /* Initialize AIO */
- s->iocb_free_count = s->max_aio_reqs;
- s->iocb_queued = 0;
-
- if (!(s->iocb_list = malloc(sizeof(struct iocb) * s->max_aio_reqs)) ||
- !(s->pending_aio = malloc(sizeof(struct pending_aio) * s->max_aio_reqs)) ||
- !(s->aio_events = malloc(sizeof(struct io_event) * s->max_aio_reqs)) ||
- !(s->iocb_free = malloc(sizeof(struct iocb *) * s->max_aio_reqs)) ||
- !(s->iocb_queue = malloc(sizeof(struct iocb *) * s->max_aio_reqs))) {
- DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
- s->max_aio_reqs);
- goto fail;
- }
-
- ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
- if (ret < 0) {
- if (ret == -EAGAIN) {
- DPRINTF("Couldn't setup AIO context. If you are "
- "trying to concurrently use a large number "
- "of blktap-based disks, you may need to "
- "increase the system-wide aio request limit. "
- "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
- "aio-max-nr')\n");
- } else {
- DPRINTF("Couldn't setup AIO context.\n");
- }
- goto fail;
- }
-
- for (i=0;i<s->max_aio_reqs;i++)
- s->iocb_free[i] = &s->iocb_list[i];
-
- DPRINTF("AIO state initialised\n");
-
- return 0;
-
- fail:
- return -1;
-}
-
static uint32_t gen_cksum(char *ptr, int len)
{
unsigned char *md;
return 0;
}
-static int async_read(struct tdqcow_state *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->iocb_free[--s->iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pread(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->iocb_queue[s->iocb_queued++] = io;
-
- return 1;
-}
-
-static int async_write(struct tdqcow_state *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->iocb_free[--s->iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pwrite(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->iocb_queue[s->iocb_queued++] = io;
-
- return 1;
-}
-
-/*TODO: Fix sector span!*/
-static int aio_can_lock(struct tdqcow_state *s, uint64_t sector)
-{
- return (s->sector_lock[sector] ? 0 : 1);
-}
-
-static int aio_lock(struct tdqcow_state *s, uint64_t sector)
-{
- return ++s->sector_lock[sector];
-}
-
-static void aio_unlock(struct tdqcow_state *s, uint64_t sector)
-{
- if (!s->sector_lock[sector]) return;
-
- --s->sector_lock[sector];
- return;
-}
-
/*
* The crypt function is compatible with the linux cryptoloop
* algorithm for < 4 GB images. NOTE: out_buf == in_buf is
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;
- dd->io_fd[0] = s->aio_ctx.pollfd;
+ dd->io_fd[0] = s->aio.aio_ctx.pollfd;
}
/* Open the disk file and initialize qcow state. */
int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags)
{
int fd, len, i, shift, ret, size, l1_table_size, o_flags;
+ int max_aio_reqs;
struct td_state *bs = dd->td_state;
struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
char *buf;
}
end_xenhdr:
- if (init_aio_state(dd)!=0) {
+
+ /* A segment (i.e. a page) can span multiple clusters */
+ max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
+ MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
+
+ if (tap_aio_init(&s->aio, bs->size, max_aio_reqs)!=0) {
DPRINTF("Unable to initialise AIO state\n");
- free_aio_state(dd);
+ tap_aio_free(&s->aio);
goto fail;
}
init_fds(dd);
fail:
DPRINTF("QCOW Open failed\n");
- free_aio_state(dd);
+ tap_aio_free(&s->aio);
free(s->l1_table);
free(s->l2_cache);
free(s->cluster_cache);
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->aio, sector + i))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
/*We store a local record of the request*/
if (n > nb_sectors)
n = nb_sectors;
- if (s->iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio, sector))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
if(!cluster_offset) {
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->aio, sector);
ret = cb(dd, BLK_NOT_ALLOCATED,
sector, n, id, private);
if (ret == -EBUSY) {
} else
rsp += ret;
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->aio, sector);
if (decompress_cluster(s, cluster_offset) < 0) {
rsp += cb(dd, -EIO, sector,
nb_sectors, id, private);
512 * n);
rsp += cb(dd, 0, sector, n, id, private);
} else {
- async_read(s, n * 512,
+ tap_aio_read(&s->aio, s->fd, n * 512,
(cluster_offset + index_in_cluster * 512),
buf, cb, id, sector, private);
}
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->aio, sector + i))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
/*We store a local record of the request*/
if (n > nb_sectors)
n = nb_sectors;
- if (s->iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio, sector))
return cb(dd, -EBUSY, sector, nb_sectors, id, private);
cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
index_in_cluster+n);
if (!cluster_offset) {
DPRINTF("Ooops, no write cluster offset!\n");
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->aio, sector);
return cb(dd, -EIO, sector, nb_sectors, id, private);
}
encrypt_sectors(s, sector, s->cluster_data,
(unsigned char *)buf, n, 1,
&s->aes_encrypt_key);
- async_write(s, n * 512,
+ tap_aio_write(&s->aio, s->fd, n * 512,
(cluster_offset + index_in_cluster*512),
(char *)s->cluster_data, cb, id, sector,
private);
} else {
- async_write(s, n * 512,
+ tap_aio_write(&s->aio, s->fd, n * 512,
(cluster_offset + index_in_cluster*512),
buf, cb, id, sector, private);
}
int tdqcow_submit(struct disk_driver *dd)
{
- int ret;
- struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
-
- if (!prv->iocb_queued)
- return 0;
-
- ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->iocb_queued = 0;
+ struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
- return 0;
+ return tap_aio_submit(&prv->aio);
}
int tdqcow_close(struct disk_driver *dd)
close(fd);
}
- io_destroy(s->aio_ctx.aio_ctx);
+ io_destroy(s->aio.aio_ctx.aio_ctx);
free(s->name);
free(s->l1_table);
free(s->l2_cache);
if (sid > MAX_IOFD) return 1;
- nr_events = tap_aio_get_events(&prv->aio_ctx);
+ nr_events = tap_aio_get_events(&prv->aio.aio_ctx);
repeat:
- for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
+ for (ep = prv->aio.aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;
- pio = &prv->pending_aio[(long)io->data];
+ pio = &prv->aio.pending_aio[(long)io->data];
- aio_unlock(prv, pio->sector);
+ tap_aio_unlock(&prv->aio, pio->sector);
if (prv->crypt_method)
encrypt_sectors(prv, pio->sector,
pio->sector, pio->nb_sectors,
pio->id, pio->private);
- prv->iocb_free[prv->iocb_free_count++] = io;
+ prv->aio.iocb_free[prv->aio.iocb_free_count++] = io;
}
if (nr_events) {
- nr_events = tap_aio_more_events(&prv->aio_ctx);
+ nr_events = tap_aio_more_events(&prv->aio.aio_ctx);
goto repeat;
}
- tap_aio_continue(&prv->aio_ctx);
+ tap_aio_continue(&prv->aio.aio_ctx);
return rsp;
}
int64_t total_sectors;
-
- struct {
- tap_aio_context_t aio_ctx;
- int max_aio_reqs;
- struct iocb *iocb_list;
- struct iocb **iocb_free;
- struct pending_aio *pending_aio;
- int iocb_free_count;
- struct iocb **iocb_queue;
- int iocb_queued;
- struct io_event *aio_events;
-
- uint8_t *sector_lock; /*Locking bitmap for AIO reads/writes*/
- } async;
+ tap_aio_context_t async;
/* Original qemu variables */
int cluster_bits;
static void check_refcounts(struct disk_driver *bs);
#endif
-static int init_aio_state(struct disk_driver *bs);
-static void free_aio_state(struct disk_driver *bs);
-
static int qcow_sync_read(struct disk_driver *dd, uint64_t sector,
int nb_sectors, char *buf, td_callback_t cb,
int id, void *prv);
static int qcow_open(struct disk_driver *bs, const char *filename, td_flag_t flags)
{
BDRVQcowState *s = bs->private;
- int len, i, shift, ret;
+ int len, i, shift, ret, max_aio_reqs;
QCowHeader header;
int fd, o_flags;
#ifdef USE_AIO
/* Initialize AIO */
- if (init_aio_state(bs)!=0) {
+
+ /* A segment (i.e. a page) can span multiple clusters */
+ max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
+ MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
+
+ if (tap_aio_init(&s->async, bs->td_state->size, max_aio_reqs)) {
DPRINTF("Unable to initialise AIO state\n");
- free_aio_state(bs);
+ tap_aio_free(&s->async);
goto fail;
}
DPRINTF("qcow_open failed\n");
#ifdef USE_AIO
- free_aio_state(bs);
+ tap_aio_free(&s->async);
#endif
qcow_free_snapshots(bs);
#ifdef USE_AIO
-/*
- * General AIO helper functions
- */
-
-#define IOCB_IDX(_s, _io) ((_io) - (_s)->async.iocb_list)
-
-struct pending_aio {
- td_callback_t cb;
- int id;
- void *private;
- int nb_sectors;
- char *buf;
- uint64_t sector;
-};
-
-
-static int init_aio_state(struct disk_driver *dd)
-{
- int i, ret;
- struct td_state *bs = dd->td_state;
- struct BDRVQcowState *s = (struct BDRVQcowState*) dd->private;
- long ioidx;
-
- s->async.iocb_list = NULL;
- s->async.pending_aio = NULL;
- s->async.aio_events = NULL;
- s->async.iocb_free = NULL;
- s->async.iocb_queue = NULL;
-
- /*Initialize Locking bitmap*/
- s->async.sector_lock = calloc(1, bs->size);
-
- if (!s->async.sector_lock) {
- DPRINTF("Failed to allocate sector lock\n");
- goto fail;
- }
-
- /* A segment (i.e. a page) can span multiple clusters */
- s->async.max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
- MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
-
- /* Initialize AIO */
- s->async.iocb_free_count = s->async.max_aio_reqs;
- s->async.iocb_queued = 0;
-
- if (!(s->async.iocb_list = malloc(sizeof(struct iocb) * s->async.max_aio_reqs)) ||
- !(s->async.pending_aio = malloc(sizeof(struct pending_aio) * s->async.max_aio_reqs)) ||
- !(s->async.aio_events = malloc(sizeof(struct io_event) * s->async.max_aio_reqs)) ||
- !(s->async.iocb_free = malloc(sizeof(struct iocb *) * s->async.max_aio_reqs)) ||
- !(s->async.iocb_queue = malloc(sizeof(struct iocb *) * s->async.max_aio_reqs)))
- {
- DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
- s->async.max_aio_reqs);
- goto fail;
- }
-
- ret = tap_aio_setup(&s->async.aio_ctx, s->async.aio_events, s->async.max_aio_reqs);
- if (ret < 0) {
- if (ret == -EAGAIN) {
- DPRINTF("Couldn't setup AIO context. If you are "
- "trying to concurrently use a large number "
- "of blktap-based disks, you may need to "
- "increase the system-wide aio request limit. "
- "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
- "aio-max-nr')\n");
- } else {
- DPRINTF("Couldn't setup AIO context.\n");
- }
- goto fail;
- }
-
- for (i=0;i<s->async.max_aio_reqs;i++)
- s->async.iocb_free[i] = &s->async.iocb_list[i];
-
- DPRINTF("AIO state initialised\n");
-
- return 0;
-
-fail:
- return -1;
-}
-
-static void free_aio_state(struct disk_driver *dd)
-{
- struct BDRVQcowState *s = (struct BDRVQcowState*) dd->private;
-
- if (s->async.sector_lock)
- free(s->async.sector_lock);
- if (s->async.iocb_list)
- free(s->async.iocb_list);
- if (s->async.pending_aio)
- free(s->async.pending_aio);
- if (s->async.aio_events)
- free(s->async.aio_events);
- if (s->async.iocb_free)
- free(s->async.iocb_free);
- if (s->async.iocb_queue)
- free(s->async.iocb_queue);
-}
-
-static int async_read(struct BDRVQcowState *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->async.iocb_free[--s->async.iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->async.pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pread(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->async.iocb_queue[s->async.iocb_queued++] = io;
-
- return 1;
-}
-
-static int async_write(struct BDRVQcowState *s, int size,
- uint64_t offset, char *buf, td_callback_t cb,
- int id, uint64_t sector, void *private)
-{
- struct iocb *io;
- struct pending_aio *pio;
- long ioidx;
-
- io = s->async.iocb_free[--s->async.iocb_free_count];
-
- ioidx = IOCB_IDX(s, io);
- pio = &s->async.pending_aio[ioidx];
- pio->cb = cb;
- pio->id = id;
- pio->private = private;
- pio->nb_sectors = size/512;
- pio->buf = buf;
- pio->sector = sector;
-
- io_prep_pwrite(io, s->fd, buf, size, offset);
- io->data = (void *)ioidx;
-
- s->async.iocb_queue[s->async.iocb_queued++] = io;
-
- return 1;
-}
-
-static int async_submit(struct disk_driver *dd)
-{
- int ret;
- struct BDRVQcowState *prv = (struct BDRVQcowState*) dd->private;
-
- if (!prv->async.iocb_queued)
- return 0;
-
- ret = io_submit(prv->async.aio_ctx.aio_ctx, prv->async.iocb_queued, prv->async.iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->async.iocb_queued = 0;
-
- return 0;
-}
-
-/*TODO: Fix sector span!*/
-static int aio_can_lock(struct BDRVQcowState *s, uint64_t sector)
-{
- return (s->async.sector_lock[sector] ? 0 : 1);
-}
-
-static int aio_lock(struct BDRVQcowState *s, uint64_t sector)
-{
- return ++s->async.sector_lock[sector];
-}
-
-static void aio_unlock(struct BDRVQcowState *s, uint64_t sector)
-{
- if (!s->async.sector_lock[sector]) return;
-
- --s->async.sector_lock[sector];
- return;
-}
-
-
-
-
/*
* QCOW2 specific AIO functions
*/
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->async, sector + i))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
while (nb_sectors > 0) {
if (n > nb_sectors)
n = nb_sectors;
- if (s->async.iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->async.iocb_free_count == 0 || !tap_aio_lock(&s->async, sector))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
if (!cluster_offset) {
/* The requested sector is not allocated */
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->async, sector);
ret = cb(bs, BLK_NOT_ALLOCATED,
sector, n, id, private);
if (ret == -EBUSY) {
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* sync read for compressed clusters */
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->async, sector);
if (decompress_cluster(s, cluster_offset) < 0) {
rsp += cb(bs, -EIO, sector, nb_sectors, id, private);
goto done;
} else {
/* async read */
- async_read(s, n * 512,
+ tap_aio_read(&s->async, s->fd, n * 512,
(cluster_offset + index_in_cluster * 512),
buf, cb, id, sector, private);
}
/*Check we can get a lock*/
for (i = 0; i < nb_sectors; i++)
- if (!aio_can_lock(s, sector + i))
+ if (!tap_aio_can_lock(&s->async, sector + i))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
if (n > nb_sectors)
n = nb_sectors;
- if (s->async.iocb_free_count == 0 || !aio_lock(s, sector))
+ if (s->async.iocb_free_count == 0 || !tap_aio_lock(&s->async, sector))
return cb(bs, -EBUSY, sector, nb_sectors, id, private);
if (!cluster_offset) {
DPRINTF("Ooops, no write cluster offset!\n");
- aio_unlock(s, sector);
+ tap_aio_unlock(&s->async, sector);
return cb(bs, -EIO, sector, nb_sectors, id, private);
}
// TODO Encryption
- async_write(s, n * 512,
+ tap_aio_write(&s->async, s->fd, n * 512,
(cluster_offset + index_in_cluster*512),
buf, cb, id, sector, private);
static int qcow_close(struct disk_driver *bs)
{
BDRVQcowState *s = bs->private;
-
+
+#ifdef USE_AIO
+ io_destroy(s->async.aio_ctx.aio_ctx);
+ tap_aio_free(&s->async);
+#else
close(s->poll_pipe[0]);
- close(s->poll_pipe[1]);
+ close(s->poll_pipe[1]);
+#endif
qemu_free(s->l1_table);
qemu_free(s->l2_cache);
static int qcow_submit(struct disk_driver *bs)
{
- int ret;
- struct BDRVQcowState *prv = (struct BDRVQcowState*)bs->private;
-
-
- fsync(prv->fd);
+ struct BDRVQcowState *s = (struct BDRVQcowState*) bs->private;
- if (!prv->async.iocb_queued)
- return 0;
-
- ret = io_submit(prv->async.aio_ctx.aio_ctx, prv->async.iocb_queued, prv->async.iocb_queue);
-
- /* XXX: TODO: Handle error conditions here. */
-
- /* Success case: */
- prv->async.iocb_queued = 0;
-
- return 0;
+ fsync(s->fd);
+ return tap_aio_submit(&s->async);
}
pio = &prv->async.pending_aio[(long)io->data];
- aio_unlock(prv, pio->sector);
+ tap_aio_unlock(&prv->async, pio->sector);
if (prv->crypt_method)
encrypt_sectors(prv, pio->sector,
#include <unistd.h>
#include <errno.h>
#include <string.h>
+#include <stdlib.h>
/**
* We used a kernel patch to return an fd associated with the AIO context
static void *
tap_aio_completion_thread(void *arg)
{
- tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+ tap_aio_internal_context_t *ctx = (tap_aio_internal_context_t *) arg;
int command;
int nr_events;
int rc;
}
void
-tap_aio_continue(tap_aio_context_t *ctx)
+tap_aio_continue(tap_aio_internal_context_t *ctx)
{
int cmd = 0;
DPRINTF("Cannot write to command pipe\n");
}
-int
-tap_aio_setup(tap_aio_context_t *ctx,
+static int
+tap_aio_setup(tap_aio_internal_context_t *ctx,
struct io_event *aio_events,
int max_aio_events)
{
}
int
-tap_aio_get_events(tap_aio_context_t *ctx)
+tap_aio_get_events(tap_aio_internal_context_t *ctx)
{
int nr_events = 0;
return nr_events;
}
-int tap_aio_more_events(tap_aio_context_t *ctx)
+int tap_aio_more_events(tap_aio_internal_context_t *ctx)
{
return io_getevents(ctx->aio_ctx, 0,
ctx->max_aio_events, ctx->aio_events, NULL);
}
+int tap_aio_init(tap_aio_context_t *ctx, uint64_t sectors,
+ int max_aio_reqs)
+{
+ int i, ret;
+ long ioidx;
+
+ ctx->iocb_list = NULL;
+ ctx->pending_aio = NULL;
+ ctx->aio_events = NULL;
+ ctx->iocb_free = NULL;
+ ctx->iocb_queue = NULL;
+
+ /*Initialize Locking bitmap*/
+ ctx->sector_lock = calloc(1, sectors);
+
+ if (!ctx->sector_lock) {
+ DPRINTF("Failed to allocate sector lock\n");
+ goto fail;
+ }
+
+
+ /* Initialize AIO */
+ ctx->max_aio_reqs = max_aio_reqs;
+ ctx->iocb_free_count = ctx->max_aio_reqs;
+ ctx->iocb_queued = 0;
+
+ if (!(ctx->iocb_list = malloc(sizeof(struct iocb) * ctx->max_aio_reqs)) ||
+ !(ctx->pending_aio = malloc(sizeof(struct pending_aio) * ctx->max_aio_reqs)) ||
+ !(ctx->aio_events = malloc(sizeof(struct io_event) * ctx->max_aio_reqs)) ||
+ !(ctx->iocb_free = malloc(sizeof(struct iocb *) * ctx->max_aio_reqs)) ||
+ !(ctx->iocb_queue = malloc(sizeof(struct iocb *) * ctx->max_aio_reqs)))
+ {
+ DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
+ ctx->max_aio_reqs);
+ goto fail;
+ }
+
+ ret = tap_aio_setup(&ctx->aio_ctx, ctx->aio_events, ctx->max_aio_reqs);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ DPRINTF("Couldn't setup AIO context. If you are "
+ "trying to concurrently use a large number "
+ "of blktap-based disks, you may need to "
+ "increase the system-wide aio request limit. "
+ "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
+ "aio-max-nr')\n");
+ } else {
+ DPRINTF("Couldn't setup AIO context.\n");
+ }
+ goto fail;
+ }
+
+ for (i=0;i<ctx->max_aio_reqs;i++)
+ ctx->iocb_free[i] = &ctx->iocb_list[i];
+
+ DPRINTF("AIO state initialised\n");
+
+ return 0;
+
+fail:
+ return -1;
+}
+
+void tap_aio_free(tap_aio_context_t *ctx)
+{
+ if (ctx->sector_lock)
+ free(ctx->sector_lock);
+ if (ctx->iocb_list)
+ free(ctx->iocb_list);
+ if (ctx->pending_aio)
+ free(ctx->pending_aio);
+ if (ctx->aio_events)
+ free(ctx->aio_events);
+ if (ctx->iocb_free)
+ free(ctx->iocb_free);
+ if (ctx->iocb_queue)
+ free(ctx->iocb_queue);
+}
+
+/*TODO: Fix sector span!*/
+int tap_aio_can_lock(tap_aio_context_t *ctx, uint64_t sector)
+{
+ return (ctx->sector_lock[sector] ? 0 : 1);
+}
+
+int tap_aio_lock(tap_aio_context_t *ctx, uint64_t sector)
+{
+ return ++ctx->sector_lock[sector];
+}
+
+void tap_aio_unlock(tap_aio_context_t *ctx, uint64_t sector)
+{
+ if (!ctx->sector_lock[sector]) return;
+
+ --ctx->sector_lock[sector];
+ return;
+}
+
+
+int tap_aio_read(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ long ioidx;
+
+ if (ctx->iocb_free_count == 0)
+ return -ENOMEM;
+
+ io = ctx->iocb_free[--ctx->iocb_free_count];
+
+ ioidx = IOCB_IDX(ctx, io);
+ pio = &ctx->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+ pio->nb_sectors = size/512;
+ pio->buf = buf;
+ pio->sector = sector;
+
+ io_prep_pread(io, fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ ctx->iocb_queue[ctx->iocb_queued++] = io;
+
+ return 0;
+}
+
+int tap_aio_write(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private)
+{
+ struct iocb *io;
+ struct pending_aio *pio;
+ long ioidx;
+
+ if (ctx->iocb_free_count == 0)
+ return -ENOMEM;
+
+ io = ctx->iocb_free[--ctx->iocb_free_count];
+
+ ioidx = IOCB_IDX(ctx, io);
+ pio = &ctx->pending_aio[ioidx];
+ pio->cb = cb;
+ pio->id = id;
+ pio->private = private;
+ pio->nb_sectors = size/512;
+ pio->buf = buf;
+ pio->sector = sector;
+
+ io_prep_pwrite(io, fd, buf, size, offset);
+ io->data = (void *)ioidx;
+
+ ctx->iocb_queue[ctx->iocb_queued++] = io;
+
+ return 0;
+}
+
+int tap_aio_submit(tap_aio_context_t *ctx)
+{
+ int ret;
+
+ if (!ctx->iocb_queued)
+ return 0;
+
+ ret = io_submit(ctx->aio_ctx.aio_ctx, ctx->iocb_queued, ctx->iocb_queue);
+
+ /* XXX: TODO: Handle error conditions here. */
+
+ /* Success case: */
+ ctx->iocb_queued = 0;
+
+ return 0;
+}
#include <pthread.h>
#include <libaio.h>
+#include <stdint.h>
-struct tap_aio_context {
+#include "tapdisk.h"
+
+#define IOCB_IDX(_ctx, _io) ((_io) - (_ctx)->iocb_list)
+
+struct tap_aio_internal_context {
io_context_t aio_ctx;
struct io_event *aio_events;
int pollfd;
unsigned int poll_in_thread : 1;
};
+
+
+typedef struct tap_aio_internal_context tap_aio_internal_context_t;
+
+
+struct pending_aio {
+ td_callback_t cb;
+ int id;
+ void *private;
+ int nb_sectors;
+ char *buf;
+ uint64_t sector;
+};
+
+
+struct tap_aio_context {
+ tap_aio_internal_context_t aio_ctx;
+
+ int max_aio_reqs;
+ struct iocb *iocb_list;
+ struct iocb **iocb_free;
+ struct pending_aio *pending_aio;
+ int iocb_free_count;
+ struct iocb **iocb_queue;
+ int iocb_queued;
+ struct io_event *aio_events;
+
+ /* Locking bitmap for AIO reads/writes */
+ uint8_t *sector_lock;
+};
typedef struct tap_aio_context tap_aio_context_t;
-int tap_aio_setup (tap_aio_context_t *ctx,
- struct io_event *aio_events,
- int max_aio_events);
-void tap_aio_continue (tap_aio_context_t *ctx);
-int tap_aio_get_events (tap_aio_context_t *ctx);
-int tap_aio_more_events(tap_aio_context_t *ctx);
+void tap_aio_continue (tap_aio_internal_context_t *ctx);
+int tap_aio_get_events (tap_aio_internal_context_t *ctx);
+int tap_aio_more_events(tap_aio_internal_context_t *ctx);
+
+
+int tap_aio_init(tap_aio_context_t *ctx, uint64_t sectors,
+ int max_aio_reqs);
+void tap_aio_free(tap_aio_context_t *ctx);
+
+int tap_aio_can_lock(tap_aio_context_t *ctx, uint64_t sector);
+int tap_aio_lock(tap_aio_context_t *ctx, uint64_t sector);
+void tap_aio_unlock(tap_aio_context_t *ctx, uint64_t sector);
+
+
+int tap_aio_read(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private);
+int tap_aio_write(tap_aio_context_t *ctx, int fd, int size,
+ uint64_t offset, char *buf, td_callback_t cb,
+ int id, uint64_t sector, void *private);
+int tap_aio_submit(tap_aio_context_t *ctx);
#endif /* __TAPAIO_H__ */